#Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(leaps)
library(caret)
## Loading required package: lattice
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(mosaicData)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-2
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(scatterplot3d)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(rgl)
## This build of rgl does not include OpenGL functions. Use
## rglwidget() to display results, e.g. via options(rgl.printRglwidget = TRUE).
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
Data Source The data comes from
daily_aggregate <- read.csv("/home/ana/Documents/IMA Math Boot camp/ima2021_project-main/Data/2016_2020/aggragate_daily_data.csv")
#change the date format
daily_aggregate = daily_aggregate %>%
mutate(year= year(date), month = month(date), day= day(date))
daily_aggregate = daily_aggregate %>%
mutate(month_day = 31*(month-1) + day)
daily_aggregate %>%
ggplot(aes(x = month_day, y=water_level, color=factor(year)))+
geom_line()
#energy generated vs month_day
daily_aggregate %>%
ggplot(aes(x = month_day, y=energy_generated, color=factor(year)))+
geom_line()
daily_aggregate = daily_aggregate %>%
mutate(year_month = 12*(year-1) + month)
daily_aggregate %>%
ggplot(aes(x = year_month, y=water_level, color=factor(year)))+
geom_line()
Visualizing data
#affluent energy vs year_month
daily_aggregate %>%
ggplot(aes(x = year_month, y = affluent_natural_energy_southeast))+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#affluent energy vs energy generated
daily_aggregate %>%
ggplot(aes(x = affluent_natural_energy_southeast, y = energy_generated, color = month))+
geom_point(alpha = 0.8)+
geom_smooth(method = "lm", color = "black")
## `geom_smooth()` using formula 'y ~ x'
#demand vs energy generated
daily_aggregate %>%
ggplot(aes(x = maximum_demand, y = energy_generated, color = month))+
geom_point(alpha = 0.8)+
geom_smooth(method = 'lm', color = "black")
## `geom_smooth()` using formula 'y ~ x'
#affluent energy vs demand
daily_aggregate %>%
ggplot(aes(x = affluent_natural_energy_southeast, y = maximum_demand, color = month))+
geom_point(alpha = 0.8)+
geom_smooth(method = "lm", color = "black")
## `geom_smooth()` using formula 'y ~ x'
Energy generated vs rain vs max temperature daily
#Importing weather information
weather <- read.csv("/home/ana/Documents/IMA Math Boot camp/ima2021_project-main/Data/historic_weather.csv")
weather[is.na(weather)] <- 0
daily_weather <- weather %>%
group_by(date) %>%
summarise( rain = sum(rain_mm), max_temp = max(temp_max), temp_min = min(temp_min))
#change the date format
daily_weather = daily_weather %>%
mutate(year= year(date), month = month(date), day= day(date))
daily_weather = daily_weather %>%
mutate(month_day = 31*(month-1) + day)
#merge and clean
DF <- merge(daily_weather, daily_aggregate, by="month_day")
DF = DF %>%
filter(year.x > 2008) %>%
select( year.x, month.x, year_month, month_day, energy_generated, rain, max_temp, temp_min, maximum_demand, volume_used)
# Visualizing rain, montly.
DF %>%
ggplot(aes(x = month_day, y = rain))+
geom_smooth(color = "royalblue3")+
labs(x = "Month", y = "Rain (mm)", title = "Rain per month in Barra Bonita")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Visualizing temperature, monthly.
DF %>%
ggplot(aes(x = month.x, y = max_temp))+
geom_smooth(color = "firebrick")+
labs(x = "Month", y = "Maximum temperature (C)", title = "Maximum temperature per month in Barra Bonita")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Rain vs energy generated
DF %>%
ggplot(aes(x = lag(rain, 10), y = energy_generated, color = max_temp))+
geom_point(alpha = 0.7)+
scale_color_gradient(low = "navyblue", high = "red3" )+
labs(x = "Rain -lag 10- (mm)", y = "Energy generated (GW)", title = "Rain vs Energy generated")+
geom_smooth(method = "lm", color = "black")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
#temperature vs energy generated
DF %>%
ggplot(aes(x = max_temp, y = energy_generated, color = rain))+
geom_point(alpha = 0.7)+
scale_color_gradient(low = "gold", high = "navyblue")+
labs(x= "Maximum temperature (C)", y= "Energy generated (GW)", title = "Maximum temperature vs Energy generated")+
geom_smooth(method = "lm", color = "black")
## `geom_smooth()` using formula 'y ~ x'
# temperature vs rain vs energy generated
scatterplot3d(DF[, 5:7])
ggpairs(DF[,5:10], aes(alpha = 0.4))